/* $Id: GC_Assembly.cpp 666257 2023-04-24 15:24:30Z mozese2 $
 * ===========================================================================
 *
 *                            PUBLIC DOMAIN NOTICE
 *               National Center for Biotechnology Information
 *
 *  This software/database is a "United States Government Work" under the
 *  terms of the United States Copyright Act.  It was written as part of
 *  the author's official duties as a United States Government employee and
 *  thus cannot be copyrighted.  This software/database is freely available
 *  to the public for use. The National Library of Medicine and the U.S.
 *  Government have not placed any restriction on its use or reproduction.
 *
 *  Although all reasonable efforts have been taken to ensure the accuracy
 *  and reliability of the software and data, the NLM and the U.S.
 *  Government do not and cannot warrant the performance or results that
 *  may be obtained by using this software or data. The NLM and the U.S.
 *  Government disclaim all warranties, express or implied, including
 *  warranties of performance, merchantability or fitness for any particular
 *  purpose.
 *
 *  Please cite the author in any work or product based on this material.
 *
 * ===========================================================================
 *
 * Author:  .......
 *
 * File Description:
 *   .......
 *
 * Remark:
 *   This code was originally generated by application DATATOOL
 *   using the following specifications:
 *   'genome_collection.asn'.
 */

// standard includes
#include <ncbi_pch.hpp>

// generated includes
#include <objects/genomecoll/GC_Assembly.hpp>
#include <objects/genomecoll/GC_AssemblyUnit.hpp>
#include <objects/genomecoll/GC_AssemblySet.hpp>
#include <objects/genomecoll/GC_AssemblyDesc.hpp>
#include <objects/genomecoll/GC_Replicon.hpp>
#include <objects/genomecoll/GC_Sequence.hpp>
#include <objects/genomecoll/GC_TaggedSequences.hpp>

#include <objects/seq/Seq_descr.hpp>
#include <objects/seq/Seqdesc.hpp>
#include <objects/general/Dbtag.hpp>
#include <objects/general/Object_id.hpp>
#include <objects/seqfeat/BioSource.hpp>
#include <objects/seqfeat/Org_ref.hpp>

#include <serial/serial.hpp>
#include <serial/iterator.hpp>

// generated classes

BEGIN_NCBI_SCOPE

BEGIN_objects_SCOPE // namespace ncbi::objects::

// constructor
CGC_Assembly::CGC_Assembly(void)
: m_TargetSet(NULL)
{
}


// destructor
CGC_Assembly::~CGC_Assembly(void)
{
}

const list<CRef<CDbtag>>& CGC_Assembly::x_GetId() const
{
    if (IsAssembly_set()) return GetAssembly_set().GetId();
    if (IsUnit()) return GetUnit().GetId();
    NCBI_THROW(CException, eUnknown, "unhandled GC-Assembly choice");
}

int CGC_Assembly::GetReleaseId() const
{
    int release_id = 0;
    typedef list<CRef<CDbtag>> TId;
    ITERATE (TId, id_it, x_GetId()) {
        if ((*id_it)->GetDb() == "GenColl"  &&
            (*id_it)->GetTag().IsId()) {
            release_id = (*id_it)->GetTag().GetId();
            break;
        }
    }
    return release_id;
}


string CGC_Assembly::GetAccession() const
{
    string accession;
    typedef list<CRef<CDbtag>> TId;
    ITERATE (TId, id_it, x_GetId()) {
        if ((*id_it)->GetDb() == "GenColl"  &&
            (*id_it)->GetTag().IsStr()) {
            accession = (*id_it)->GetTag().GetStr();
            break;
        }
    }
    return accession;
}

string CGC_Assembly::GetBestIdentifier() const
{
    const string acc = GetAccession();
    return !acc.empty() ? acc : x_GetSubmitterId();
}

string CGC_Assembly::x_GetSubmitterId() const
{
    string submitter_id;
    typedef list<CRef<CDbtag>> TId;
    ITERATE (TId, id_it, x_GetId()) {
        if ((*id_it)->GetDb() == "submitter"  &&
            (*id_it)->GetTag().IsStr()) {
            submitter_id = (*id_it)->GetTag().GetStr();
            break;
        }
    }
    return submitter_id;
}

const CGC_AssemblyDesc& CGC_Assembly::GetDesc() const
{
    CConstRef<CGC_AssemblyDesc> desc;
    if (IsAssembly_set()) {
        return GetAssembly_set().GetDesc();
    } else if (IsUnit()) {
        return GetUnit().GetDesc();
    } else {
        NCBI_THROW(CException, eUnknown,
                   "assembly is neither unit not set");
    }
}


string CGC_Assembly::GetName() const
{
    CConstRef<CGC_AssemblyDesc> desc;
    if (IsAssembly_set()) {
        desc.Reset(&GetAssembly_set().GetDesc());
    } else if (IsUnit()) {
        desc.Reset(&GetUnit().GetDesc());
    }

    if (desc && desc->CanGetName()) {
        return desc->GetName();
    }

    return kEmptyStr;
}


string CGC_Assembly::GetDisplayName() const
{
    if (IsAssembly_set()) {
        return GetName();
    } else if (IsUnit()) {
        return GetUnit().GetDisplayName();
    }

    return kEmptyStr;
}


string CGC_Assembly::GetFileSafeName() const
{
    return GetDesc().IsSetFilesafe_name()
               ? GetDesc().GetFilesafe_name()
               : NStr::Replace(GetName(), " ", "_");
}


string CGC_Assembly::GetFileSafeDisplayName() const
{
    if (IsAssembly_set()) {
        return GetFileSafeName();
    } else if (IsUnit()) {
        return GetUnit().GetFileSafeDisplayName();
    }

    return kEmptyStr;
}


TTaxId CGC_Assembly::GetTaxId() const
{
    CConstRef<CGC_AssemblyDesc> desc;
    if (IsAssembly_set()) {
        desc.Reset(&GetAssembly_set().GetDesc());
    } else if (IsUnit()) {
        desc.Reset(&GetUnit().GetDesc());
    }

    TTaxId tax_id = ZERO_TAX_ID;
    if (desc  &&  desc->IsSetDescr()) {
        ITERATE (CGC_AssemblyDesc::TDescr::Tdata, it, desc->GetDescr().Get()) {
            if ((*it)->IsSource()) {
                tax_id = (*it)->GetSource().GetOrg().GetTaxId();
                break;
            }
        }
    }
    return tax_id;
}


bool CGC_Assembly::IsRefSeq() const
{
    CConstRef<CGC_AssemblyDesc> desc;
    if (IsAssembly_set()) {
        desc.Reset(&GetAssembly_set().GetDesc());
    } else if (IsUnit()) {
        desc.Reset(&GetUnit().GetDesc());
    }

    if (desc  &&  desc->IsSetRelease_type()) {
        return (desc->GetRelease_type() ==  CGC_AssemblyDesc::eRelease_type_refseq);
    }
    return false;
}


bool CGC_Assembly::IsGenBank() const
{
    CConstRef<CGC_AssemblyDesc> desc;
    if (IsAssembly_set()) {
        desc.Reset(&GetAssembly_set().GetDesc());
    } else if (IsUnit()) {
        desc.Reset(&GetUnit().GetDesc());
    }

    if (desc  &&  desc->IsSetRelease_type()) {
        return (desc->GetRelease_type() ==  CGC_AssemblyDesc::eRelease_type_genbank);
    }
    return false;
}

bool CGC_Assembly::IsOrganelle() const
{
    return GetName() == "non-nuclear";
}

CGC_AssemblyUnit::TClass CGC_Assembly::GetUnitClass() const
{
    if (IsUnit()) {
        return GetUnit().GetClass();
    } else {
        return CGC_AssemblyUnit::eClass_other;
    }
}

/////////////////////////////////////////////////////////////////////////////

/// Retrieve a list of all assembly units contained in this assembly
CGC_Assembly::TAssemblyUnits CGC_Assembly::GetAssemblyUnits() const
{
    TAssemblyUnits units;
    if (IsUnit()) {
        units.push_back(CConstRef<CGC_AssemblyUnit>(&GetUnit()));
    } else {
        TAssemblyUnits tmp =
            GetAssembly_set().GetPrimary_assembly().GetAssemblyUnits();
        units.insert(units.end(), tmp.begin(), tmp.end());
        if (GetAssembly_set().IsSetMore_assemblies()) {
            ITERATE (CGC_AssemblySet::TMore_assemblies, it,
                     GetAssembly_set().GetMore_assemblies()) {
                tmp = (**it).GetAssemblyUnits();
                units.insert(units.end(), tmp.begin(), tmp.end());
            }
        }
    }

    return units;
}


/////////////////////////////////////////////////////////////////////////////

CGC_Assembly::TFullAssemblies CGC_Assembly::GetFullAssemblies() const
{
    TFullAssemblies assms;

    if (IsAssembly_set()) {
        const CGC_AssemblySet& set = GetAssembly_set();
        switch (set.GetSet_type()) {
        case CGC_AssemblySet::eSet_type_assembly_set:
            /// each sub-assembly is its own entity and acts as its own root
            assms.push_back
                (CConstRef<CGC_Assembly>(&set.GetPrimary_assembly()));
            if (set.IsSetMore_assemblies()) {
                ITERATE (CGC_AssemblySet::TMore_assemblies, it,
                         set.GetMore_assemblies()) {
                    assms.push_back(*it);
                }
            }
            break;

        case CGC_AssemblySet::eSet_type_full_assembly:
            assms.push_back
                (CConstRef<CGC_Assembly>(this));
            break;

        default:
            break;
        }
    } else {
        TAssemblyUnits units = GetAssemblyUnits();
        set< CConstRef<CGC_Assembly> > tmp;
        ITERATE (TAssemblyUnits, it, units) {
            CConstRef<CGC_Assembly> assm = (*it)->GetFullAssembly();
            if (tmp.insert(assm).second) {
                assms.push_back(assm);
            }
        }
    }

    return assms;
}


/////////////////////////////////////////////////////////////////////////////

struct SBestSequence {
    bool operator()(const CConstRef<CGC_Sequence> &seq1,
                    const CConstRef<CGC_Sequence> &seq2) const
    {
        /// Prefer sequence from reference full assembly
        if (seq1->GetFullAssembly()->IsTargetSetReference() &&
           !seq2->GetFullAssembly()->IsTargetSetReference())
        {
            return true;
        }
        if (seq2->GetFullAssembly()->IsTargetSetReference() &&
           !seq1->GetFullAssembly()->IsTargetSetReference())
        {
            return false;
        }

        /// Prefer sequence from primary unit
        if (seq1->GetAssemblyUnit()->IsPrimaryUnit() &&
           !seq2->GetAssemblyUnit()->IsPrimaryUnit())
        {
            return true;
        }
        if (seq2->GetAssemblyUnit()->IsPrimaryUnit() &&
           !seq1->GetAssemblyUnit()->IsPrimaryUnit())
        {
            return false;
        }

        /// Prefer top-level sequence
        if (seq1->HasRole(eGC_SequenceRole_top_level) &&
           !seq2->HasRole(eGC_SequenceRole_top_level))
        {
            return true;
        }
        if (seq2->HasRole(eGC_SequenceRole_top_level) &&
           !seq1->HasRole(eGC_SequenceRole_top_level))
        {
            return false;
        }

        /// Prefer scaffold
        if (seq1->HasRole(eGC_SequenceRole_scaffold) &&
           !seq2->HasRole(eGC_SequenceRole_scaffold))
        {
            return true;
        }
        return false;
    }
};

CConstRef<CGC_Sequence> CGC_Assembly::Find(const CSeq_id_Handle& id,
                                           EFindSeqOption find_option) const
{
    if (m_SequenceMap.empty()) {
        const_cast<CGC_Assembly&>(*this).CreateIndex();
    }
    TSequenceIndex::const_iterator it = m_SequenceMap.find(id);
    if (it == m_SequenceMap.end()  ||  it->second.size() == 0) {
        return CConstRef<CGC_Sequence>();
    }
    if (it->second.size() > 1) {
        switch (find_option) {
        case eEnforceSingle:
            NCBI_THROW(CException, eUnknown,
                       "multiple sequences found in assembly: " +
                       id.GetSeqId()->AsFastaString());

        case eChooseBest:
            return *min_element(it->second.begin(), it->second.end(), SBestSequence());

        default:
            /// Arbitrarily take first one on list
            break;
        }
    }
    return it->second.front();
}

void CGC_Assembly::Find(const CSeq_id_Handle& id,
                        TSequenceList& sequences) const
{
    if (m_SequenceMap.empty()) {
        const_cast<CGC_Assembly&>(*this).CreateIndex();
    }
    sequences.clear();
    TSequenceIndex::const_iterator it = m_SequenceMap.find(id);
    if (it != m_SequenceMap.end()) {
        sequences = it->second;
    }
}

void CGC_Assembly::GetRepliconTypeLocRole(const CSeq_id_Handle& id, string& type, string& location, set<int>& role) const
{
    CGC_Assembly::TSequenceList seqs;
    Find(id, seqs);

    ITERATE(CGC_Assembly::TSequenceList, its, seqs)
    {
        if(type.empty() || location.empty())
        {
        CConstRef<CGC_Replicon> repl((*its)->GetReplicon());
        if(repl)
        {
            type = repl->GetMoleculeType();
            location = repl->GetMoleculeLocation();
        }
        }

        if((*its)->HasRole(eGC_SequenceRole_chromosome     ))    role.insert(eGC_SequenceRole_chromosome);
        if((*its)->HasRole(eGC_SequenceRole_scaffold       ))    role.insert(eGC_SequenceRole_scaffold);
        if((*its)->HasRole(eGC_SequenceRole_pseudo_scaffold))    role.insert(eGC_SequenceRole_pseudo_scaffold);
    }
}

/////////////////////////////////////////////////////////////////////////////

void CGC_Assembly::PreWrite() const
{
}

void CGC_Assembly::PostRead()
{
    CreateHierarchy();
}


void CGC_Assembly::CreateHierarchy(CGC_Assembly *target_set)
{
    //LOG_POST(Error << "CGC_Assembly::CreateHierarchy()");

    ///
    /// generate the up-links as needed
    ///
    if (target_set == NULL) {
        target_set = this;
    }
    m_TargetSet = target_set;
    if (IsUnit()) {
        x_Index(*this);
    }
    else if (IsAssembly_set()) {
        CGC_AssemblySet& set = SetAssembly_set();
        switch (set.GetSet_type()) {
        case CGC_AssemblySet::eSet_type_assembly_set:
            /// each sub-assembly is its own entity and acts as its own root
            set.SetPrimary_assembly().CreateHierarchy(target_set);
            if (set.IsSetMore_assemblies()) {
                NON_CONST_ITERATE (CGC_AssemblySet::TMore_assemblies, it,
                                   set.SetMore_assemblies()) {
                    (*it)->CreateHierarchy(target_set);
                }
            }
            break;

        case CGC_AssemblySet::eSet_type_full_assembly:
            /// we are the root
            set.SetPrimary_assembly().m_TargetSet = target_set;
            set.SetPrimary_assembly().x_Index(*this);
            if (set.IsSetMore_assemblies()) {
                NON_CONST_ITERATE (CGC_AssemblySet::TMore_assemblies, it,
                                   set.SetMore_assemblies()) {
                    (*it)->m_TargetSet = target_set;
                    (*it)->x_Index(*this);
                }
            }
            break;

        default:
            NCBI_THROW(CException, eUnknown,
                       "unknown assembly set type");
        }
    }
}


//////////////////////////////////////////////////////////////////////////////

void CGC_Assembly::CreateIndex()
{
    if (m_SequenceMap.empty()) {
        CMutexGuard LOCK(m_Mutex);
        if (m_SequenceMap.empty()) {
            CTypeConstIterator<CGC_Sequence> seq_it(*this);
            for ( ;  seq_it;  ++seq_it) {
                const CGC_Sequence& this_seq = *seq_it;
                CConstRef<CGC_Replicon> repl = this_seq.GetReplicon();

                /// bizarre pattern: the sequence is a single placed sequence
                /// with itself as the only scaffold.  if this is the case,
                /// don't index the scaffold
                if (repl  &&
                    repl->GetSequence().IsSingle()  &&
                    &repl->GetSequence().GetSingle() != &this_seq) {
                    const CGC_Sequence& repl_seq =
                        repl->GetSequence().GetSingle();
                    if (repl_seq.IsSetSequences()  &&
                        repl_seq.GetSequences().size() == 1  &&
                        repl_seq.GetSequences().front()->GetState() == CGC_TaggedSequences::eState_placed  &&
                        repl_seq.GetSequences().front()->GetSeqs().size() == 1  &&
                        repl_seq.GetSequences().front()->GetSeqs().front() == &this_seq  &&
                        repl->GetSequence().GetSingle().GetSeq_id()
                        .Match(this_seq.GetSeq_id())) {
                        continue;
                    }
                }

                m_SequenceMap[CSeq_id_Handle::GetHandle(seq_it->GetSeq_id())]
                    .push_back(CConstRef<CGC_Sequence>(&*seq_it));

                // don't forget to index aliases
                if (this_seq.IsSetSeq_id_synonyms()) {
                    set<CSeq_id_Handle> these_ids;
                    these_ids.insert(CSeq_id_Handle::GetHandle(seq_it->GetSeq_id()));

                    ITERATE (CGC_Sequence::TSeq_id_synonyms, syn_it,
                             this_seq.GetSeq_id_synonyms()) {
                        for (CTypeConstIterator<CSeq_id> id_it(**syn_it);
                             id_it;  ++id_it) {
                            CSeq_id_Handle idh =
                                CSeq_id_Handle::GetHandle(*id_it);
                            if (these_ids.insert(idh).second) {
                                m_SequenceMap[CSeq_id_Handle::GetHandle(*id_it)]
                                    .push_back(CConstRef<CGC_Sequence>(&*seq_it));
                            }
                        }
                    }
                }
            }
        }
    }
}


void CGC_Assembly::x_Index(CGC_Assembly& root)
{
    //LOG_POST(Error << "CGC_Assembly::x_Index(CGC_Assembly& root)");
    if (IsUnit()) {
        SetUnit().m_Assembly = &root;
        if (GetUnit().IsSetMols()) {
            NON_CONST_ITERATE (CGC_AssemblyUnit::TMols, it,
                               SetUnit().SetMols()) {
                x_Index(root, **it);
                x_Index(SetUnit(), **it);
            }
        }

        if (GetUnit().IsSetOther_sequences()) {
            NON_CONST_ITERATE (CGC_AssemblyUnit::TOther_sequences, it,
                               SetUnit().SetOther_sequences()) {
                NON_CONST_ITERATE (CGC_TaggedSequences::TSeqs, i,
                                   (*it)->SetSeqs()) {
                    x_Index(root, **i);
                    x_Index(SetUnit(), **i);
                    x_Index(**i, (*it)->GetState());
                }
            }
        }
    }
    else if (IsAssembly_set()) {
        CGC_AssemblySet& set = SetAssembly_set();
        set.SetPrimary_assembly().x_Index(root);
        if (set.IsSetMore_assemblies()) {
            NON_CONST_ITERATE (CGC_AssemblySet::TMore_assemblies, it,
                               set.SetMore_assemblies()) {
                (*it)->x_Index(root);
            }
        }
    }
}


void CGC_Assembly::x_Index(CGC_Assembly& assm, CGC_Replicon& replicon)
{
    //LOG_POST(Error << "CGC_Assembly::x_Index(CGC_Assembly& assm, CGC_Replicon& replicon)");
    replicon.m_Assembly = &assm;

    if (replicon.GetSequence().IsSingle()) {
        CGC_Sequence& seq = replicon.SetSequence().SetSingle();
        x_Index(assm, seq);
    } else {
        NON_CONST_ITERATE (CGC_Replicon::TSequence::TSet, it,
                           replicon.SetSequence().SetSet()) {
            CGC_Sequence& seq = **it;
            x_Index(assm, seq);
        }
    }
}


void CGC_Assembly::x_Index(CGC_Assembly& assm, CGC_Sequence& seq)
{
    //LOG_POST(Error << "CGC_Assembly::x_Index(CGC_Assembly& assm, CGC_Sequence& seq)");
    seq.m_Assembly = &assm;
    if (seq.IsSetSequences()) {
        NON_CONST_ITERATE (CGC_Sequence::TSequences, it, seq.SetSequences()) {
            NON_CONST_ITERATE (CGC_TaggedSequences::TSeqs, i,
                               (*it)->SetSeqs()) {
                x_Index(assm, **i);
            }
        }
    }
}


void CGC_Assembly::x_Index(CGC_AssemblyUnit& unit, CGC_Replicon& replicon)
{
    //LOG_POST(Error << "CGC_Assembly::x_Index(CGC_AssemblyUnit& unit, CGC_Replicon& replicon)");
    replicon.m_AssemblyUnit = &unit;

    if (replicon.GetSequence().IsSingle()) {
        CGC_Sequence& seq = replicon.SetSequence().SetSingle();
        seq.m_ParentRel = CGC_TaggedSequences::eState_placed;

        x_Index(unit,     seq);
        x_Index(replicon, seq);
    } else {
        NON_CONST_ITERATE (CGC_Replicon::TSequence::TSet, it,
                           replicon.SetSequence().SetSet()) {
            CGC_Sequence& seq = **it;
            seq.m_ParentRel = CGC_TaggedSequences::eState_placed;

            x_Index(unit,     seq);
            x_Index(replicon, seq);
        }
    }
}


void CGC_Assembly::x_Index(CGC_AssemblyUnit& unit, CGC_Sequence& seq)
{
    //LOG_POST(Error << "CGC_Assembly::x_Index(CGC_AssemblyUnit& unit, CGC_Sequence& seq)");
    seq.m_AssemblyUnit = &unit;
    if (seq.IsSetSequences()) {
        NON_CONST_ITERATE (CGC_Sequence::TSequences, it, seq.SetSequences()) {
            NON_CONST_ITERATE (CGC_TaggedSequences::TSeqs, i,
                               (*it)->SetSeqs()) {
                x_Index(unit, **i);
                x_Index(seq, **i, (*it)->GetState());
            }
        }
    }
}


void CGC_Assembly::x_Index(CGC_Replicon& replicon, CGC_Sequence& seq)
{
    //LOG_POST(Error << "CGC_Assembly::x_Index(CGC_Replicon& replicon, CGC_Sequence& seq)");
    seq.m_Replicon = &replicon;
    if (seq.IsSetSequences()) {
        NON_CONST_ITERATE (CGC_Sequence::TSequences, it, seq.SetSequences()) {
            NON_CONST_ITERATE (CGC_TaggedSequences::TSeqs, i,
                               (*it)->SetSeqs()) {
                x_Index(replicon, **i);
            }
        }
    }
}


void CGC_Assembly::x_Index(CGC_Sequence& parent, CGC_Sequence& seq,
                           CGC_TaggedSequences::TState relation)
{
    //LOG_POST(Error << "CGC_Assembly::x_Index(CGC_Sequence& parent, CGC_Sequence& seq, CGC_TaggedSequences::TState relation)");
    seq.m_ParentSequence = &parent;
    seq.m_ParentRel = relation;
    if (seq.IsSetSequences()) {
        NON_CONST_ITERATE (CGC_Sequence::TSequences, it, seq.SetSequences()) {
            NON_CONST_ITERATE (CGC_TaggedSequences::TSeqs, i,
                               (*it)->SetSeqs()) {
                x_Index(seq, **i, (*it)->GetState());
            }
        }
    }
}

void CGC_Assembly::x_Index(CGC_Sequence& seq,
                           CGC_TaggedSequences::TState relation)
{
    //LOG_POST(Error << "CGC_Assembly::x_Index(CGC_Sequence& seq, CGC_TaggedSequences::TState relation)");
    seq.m_ParentSequence = NULL;
    seq.m_ParentRel = relation;
    if (seq.IsSetSequences()) {
        NON_CONST_ITERATE (CGC_Sequence::TSequences, it, seq.SetSequences()) {
            NON_CONST_ITERATE (CGC_TaggedSequences::TSeqs, i,
                               (*it)->SetSeqs()) {
                x_Index(seq, **i, (*it)->GetState());
            }
        }
    }
}



/////////////////////////////////////////////////////////////////////////////
///
/// Molecule Extraction Routines
///

static void s_Extract(const CGC_Assembly& assm,
                      list< CConstRef<CGC_Sequence> >& molecules,
                      CGC_Assembly::ESubset subset);

static void s_Extract(const CGC_AssemblyUnit& unit,
                      list< CConstRef<CGC_Sequence> >& molecules,
                      CGC_Assembly::ESubset subset);

static void s_Extract(const CGC_AssemblySet& set,
                      list< CConstRef<CGC_Sequence> >& molecules,
                      CGC_Assembly::ESubset subset);

static bool s_RoleFitsSubset(int role, CGC_Assembly::ESubset subset)
{
    switch (subset) {
    case CGC_Assembly::eChromosome:
        return role == eGC_SequenceRole_chromosome;

    case CGC_Assembly::eScaffold:
        return role == eGC_SequenceRole_scaffold;

    case CGC_Assembly::eComponent:
        return role == eGC_SequenceRole_component;

    case CGC_Assembly::eTopLevel:
        return role == eGC_SequenceRole_top_level;

    case CGC_Assembly::eSubmitterPseudoScaffold:
        return role == eGC_SequenceRole_submitter_pseudo_scaffold;

    default:
        NCBI_THROW(CException, eUnknown,
                   "Unexpected subset in call to CGC_Assembly::GetMolecules()");
    }
}

static void s_Extract(const CGC_AssemblyUnit& unit,
                      list< CConstRef<CGC_Sequence> >& molecules,
                      CGC_Assembly::ESubset subset)
{
    bool invalid_data = false;
    CTypeConstIterator<CGC_Sequence> sequence_it(unit);
    size_t count = 0;
    for ( ;  sequence_it;  ++sequence_it, ++count) {
        if (sequence_it->GetSeq_id().IsGi()  &&  !sequence_it->IsSetRoles() ) {
            invalid_data = true;
        }

        // Include this sequence if it has the correct role, or if
        // all sequences are requested
        bool fits_role = false;
        if (subset == CGC_Assembly::eAll) {
            fits_role = true;
        }
        else if (sequence_it->IsSetRoles()) {
            ITERATE (CGC_Sequence::TRoles, it, sequence_it->GetRoles()) {
                if (s_RoleFitsSubset(*it, subset)) {
                    fits_role = true;
                    break;
                }
            }
        }
        if (fits_role) {
            molecules.push_back(CConstRef<CGC_Sequence>(&*sequence_it));
        }
    }

    if (invalid_data) {
        NCBI_THROW(CException, eUnknown,
                   "GC-Sequence.roles is not set in the current assembly; "
                   "please re-extract GC-Assembly");
    }
}


static void s_Extract(const CGC_AssemblySet& set,
                      list< CConstRef<CGC_Sequence> >& molecules,
                      CGC_Assembly::ESubset subset)
{
    s_Extract(set.GetPrimary_assembly(), molecules, subset);
    if (set.IsSetMore_assemblies()) {
        ITERATE (CGC_AssemblySet::TMore_assemblies, it,
                 set.GetMore_assemblies()) {
            s_Extract(**it, molecules, subset);
        }
    }
}


static void s_Extract(const CGC_AssemblySet& set,
                      vector< list< CConstRef<CGC_Sequence> > >& molecules,
                      CGC_Assembly::ESubset subset)
{
    molecules.clear();
    molecules.resize(set.IsSetMore_assemblies()
         ? set.GetMore_assemblies().size() + 1 : 1);
    vector< list< CConstRef<CGC_Sequence> > >::iterator unit_it = molecules.begin();
    s_Extract(set.GetPrimary_assembly(), *unit_it++, subset);
    if (set.IsSetMore_assemblies()) {
        ITERATE (CGC_AssemblySet::TMore_assemblies, it,
                 set.GetMore_assemblies()) {
            s_Extract(**it, *unit_it++, subset);
        }
    }
}


static void s_Extract(const CGC_Assembly& assm,
                      list< CConstRef<CGC_Sequence> >& molecules,
                      CGC_Assembly::ESubset subset)
{
    if (assm.IsUnit()) {
        s_Extract(assm.GetUnit(), molecules, subset);
    } else {
        s_Extract(assm.GetAssembly_set(), molecules, subset);
    }
}


static void s_Extract(const CGC_Assembly& assm,
                      vector< list< CConstRef<CGC_Sequence> > >& molecules,
                      CGC_Assembly::ESubset subset)
{
    if (assm.IsUnit()) {
        molecules.resize(1);
        molecules.front().clear();
        s_Extract(assm.GetUnit(), molecules.front(), subset);
    } else {
        s_Extract(assm.GetAssembly_set(), molecules, subset);
    }
}


void CGC_Assembly::GetMolecules(list< CConstRef<CGC_Sequence> >& molecules,
                                ESubset subset) const
{
    s_Extract(*this, molecules, subset);
}

void CGC_Assembly::GetMoleculesByUnit(vector<TSequenceList>& molecules,
                            ESubset        subset) const
{
    s_Extract(*this, molecules, subset);
}

CConstRef<CGC_Assembly> CGC_Assembly::GetTargetSet() const
{
    return CConstRef<CGC_Assembly>(m_TargetSet);
}

bool CGC_Assembly::IsTargetSetReference() const
{
    if (IsUnit() && GetUnit().GetFullAssembly().GetPointer() != this) {
        /// Assembly unit which is part of a multi-unit assembly
        return GetUnit().GetFullAssembly()->IsTargetSetReference();
    } else if (IsAssembly_set() && GetAssembly_set().GetSet_type() == 
                                   CGC_AssemblySet::eSet_type_assembly_set)
    {
        NCBI_THROW(CException, eUnknown,
                   "IsTargetSetReference() called on target set");
    } else {
        /// Full assembly
        return m_TargetSet == this ||
               &m_TargetSet->GetAssembly_set().GetPrimary_assembly()
                     == this;
    }
}


END_objects_SCOPE // namespace ncbi::objects::

END_NCBI_SCOPE

/* Original file checksum: lines: 57, chars: 1758, CRC32: 382c4e0c */
